{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Project Solution: Goal 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's what we wrote in Goal 1:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import namedtuple\n",
    "from datetime import datetime\n",
    "from functools import partial\n",
    "\n",
    "file_name = 'nyc_parking_tickets_extract.csv'\n",
    "\n",
    "with open(file_name) as f:\n",
    "    column_headers = next(f).strip('\\n').split(',')\n",
    "    \n",
    "column_names = [header.replace(' ', '_').lower() \n",
    "                for header in column_headers]\n",
    "\n",
    "Ticket = namedtuple('Ticket', column_names)\n",
    "\n",
    "def read_data():\n",
    "    with open(file_name) as f:\n",
    "        next(f)\n",
    "        yield from f\n",
    "        \n",
    "def parse_int(value, *, default=None):\n",
    "    try:\n",
    "        return int(value)\n",
    "    except ValueError:\n",
    "        return default\n",
    "    \n",
    "def parse_date(value, *, default=None):\n",
    "    date_format='%m/%d/%Y'\n",
    "    try:\n",
    "        return datetime.strptime(value, date_format).date()\n",
    "    except ValueError:\n",
    "        return default\n",
    "    \n",
    "def parse_string(value, *, default=None):\n",
    "    try:\n",
    "        cleaned = str(value).strip()\n",
    "        if not cleaned:\n",
    "            # empty string\n",
    "            return default\n",
    "        else:\n",
    "            return cleaned\n",
    "    except:\n",
    "        return default\n",
    "    \n",
    "column_parsers = (parse_int,  # summons_number, default is None\n",
    "                  parse_string,  # plate_id, default is None\n",
    "                  partial(parse_string, default=''),  # state\n",
    "                  partial(parse_string, default=''),  # plate_type\n",
    "                  parse_date,  # issue_date, default is None\n",
    "                  parse_int,  # violation_code\n",
    "                  partial(parse_string, default=''),  # body type\n",
    "                  parse_string,  # make, default is None\n",
    "                  lambda x: parse_string(x, default='')  # description\n",
    "                 )\n",
    "\n",
    "def parse_row(row, *, default=None):\n",
    "    fields = row.strip('\\n').split(',')\n",
    "    # note that I'm using a list comprehension here, \n",
    "    # since we'll need to iterate through the entire parsed fields\n",
    "    # twice - one time to check if nothing is None\n",
    "    # and another time to create the named tuple\n",
    "    parsed_data = [func(field) \n",
    "                   for func, field in zip(column_parsers, fields)]\n",
    "    if all(item is not None for item in parsed_data):\n",
    "        return Ticket(*parsed_data)\n",
    "    else:\n",
    "        return default\n",
    "    \n",
    "def parsed_data():\n",
    "    for row in read_data():\n",
    "        parsed = parse_row(row)\n",
    "        if parsed:\n",
    "            yield parsed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Goal 2: Calculating Number of Violations by Car Make"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What we want to do here is iterate through the file and keep a counter, for each make, of how many rows for that make was encountered.\n",
    "\n",
    "A good approach is to use a dictionary to keep track of the makes (as keys), and the value can be a counter that is initialized to 1 if the key (make) does not already exist, or incremented by 1 if it does.\n",
    "\n",
    "We could do this using regular dictionaries first:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TOYOT 112\n",
      "HONDA 106\n",
      "FORD 104\n",
      "CHEVR 76\n",
      "NISSA 70\n",
      "DODGE 45\n",
      "FRUEH 44\n",
      "ME/BE 38\n",
      "GMC 35\n",
      "HYUND 35\n",
      "BMW 34\n",
      "LEXUS 26\n",
      "INTER 25\n",
      "JEEP 22\n",
      "NS/OT 18\n",
      "SUBAR 18\n",
      "INFIN 13\n",
      "LINCO 12\n",
      "CHRYS 12\n",
      "ACURA 12\n",
      "AUDI 12\n",
      "VOLVO 12\n",
      "MITSU 11\n",
      "ISUZU 10\n",
      "CADIL 9\n",
      "KIA 8\n",
      "VOLKS 8\n",
      "HIN 6\n",
      "KENWO 5\n",
      "ROVER 5\n",
      "BUICK 5\n",
      "MAZDA 5\n",
      "MERCU 4\n",
      "JAGUA 3\n",
      "SMART 3\n",
      "PORSC 3\n",
      "WORKH 2\n",
      "SATUR 2\n",
      "SCION 2\n",
      "SAAB 2\n",
      "HINO 2\n",
      "FIR 1\n",
      "OLDSM 1\n",
      "PETER 1\n",
      "CITRO 1\n",
      "GEO 1\n",
      "YAMAH 1\n",
      "BSA 1\n",
      "MINI 1\n",
      "PONTI 1\n",
      "SPRI 1\n",
      "PLYMO 1\n",
      "UPS 1\n",
      "FIAT 1\n",
      "UD 1\n",
      "UTILI 1\n",
      "GMCQ 1\n",
      "STAR 1\n",
      "AM/T 1\n",
      "MI/F 1\n"
     ]
    }
   ],
   "source": [
    "makes_counts = {}\n",
    "\n",
    "for data in parsed_data():\n",
    "    if data.vehicle_make in makes_counts:\n",
    "        makes_counts[data.vehicle_make] += 1\n",
    "    else:\n",
    "        makes_counts[data.vehicle_make] = 1\n",
    "        \n",
    "for make, cnt in sorted(makes_counts.items(), \n",
    "                        key=lambda t: t[1], \n",
    "                        reverse=True):\n",
    "    print(make, cnt)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also make use of a special type of dictionary called a `defaultdict`. The way a `defaultdict` works, is that if you try to retrieve a non-existent key from the dictionary, it will return a **default** value. It does need to know the data type to use for the default - so we should provide one.\n",
    "\n",
    "Let's take a look:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = defaultdict(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d['a'] = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['b']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As you can see it returned an empty string.\n",
    "\n",
    "In our case, we want to use it to count, so we can make our default be integers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = defaultdict(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d['a'] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['b']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So, if we want to either set a key's value to `1` if it does not already exist, or increment it by `1` if it does, it's quite simple. In both cases, we just need to retrieve the key's value, and increment by 1:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = defaultdict(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d['make1'] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['make1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d['make1'] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['make1']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So, we could simplify our counter algorithm using a default dict:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TOYOT 112\n",
      "HONDA 106\n",
      "FORD 104\n",
      "CHEVR 76\n",
      "NISSA 70\n",
      "DODGE 45\n",
      "FRUEH 44\n",
      "ME/BE 38\n",
      "GMC 35\n",
      "HYUND 35\n",
      "BMW 34\n",
      "LEXUS 26\n",
      "INTER 25\n",
      "JEEP 22\n",
      "NS/OT 18\n",
      "SUBAR 18\n",
      "INFIN 13\n",
      "LINCO 12\n",
      "CHRYS 12\n",
      "ACURA 12\n",
      "AUDI 12\n",
      "VOLVO 12\n",
      "MITSU 11\n",
      "ISUZU 10\n",
      "CADIL 9\n",
      "KIA 8\n",
      "VOLKS 8\n",
      "HIN 6\n",
      "KENWO 5\n",
      "ROVER 5\n",
      "BUICK 5\n",
      "MAZDA 5\n",
      "MERCU 4\n",
      "JAGUA 3\n",
      "SMART 3\n",
      "PORSC 3\n",
      "WORKH 2\n",
      "SATUR 2\n",
      "SCION 2\n",
      "SAAB 2\n",
      "HINO 2\n",
      "FIR 1\n",
      "OLDSM 1\n",
      "PETER 1\n",
      "CITRO 1\n",
      "GEO 1\n",
      "YAMAH 1\n",
      "BSA 1\n",
      "MINI 1\n",
      "PONTI 1\n",
      "SPRI 1\n",
      "PLYMO 1\n",
      "UPS 1\n",
      "FIAT 1\n",
      "UD 1\n",
      "UTILI 1\n",
      "GMCQ 1\n",
      "STAR 1\n",
      "AM/T 1\n",
      "MI/F 1\n"
     ]
    }
   ],
   "source": [
    "makes_counts = defaultdict(int)\n",
    "\n",
    "for data in parsed_data():\n",
    "    makes_counts[data.vehicle_make] += 1\n",
    "    \n",
    "for make, cnt in sorted(makes_counts.items(), \n",
    "                        key=lambda t: t[1], \n",
    "                        reverse=True):\n",
    "    print(make, cnt)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To wrap up this goal, let's make a function that will return that dictionary, and while we're at it we'll sort the dictionary keys based on a descending count. (Remember that in Python 3.6+ dictionaries will now maintain their key order - we don't need to use an `OrderedDict`)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def violation_counts_by_make():\n",
    "    makes_counts = defaultdict(int)\n",
    "    for data in parsed_data():\n",
    "        makes_counts[data.vehicle_make] += 1\n",
    "        \n",
    "    return {make: cnt \n",
    "            for make, cnt in sorted(makes_counts.items(), \n",
    "                                    key=lambda t: t[1], \n",
    "                                    reverse=True)\n",
    "           }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'TOYOT': 112, 'HONDA': 106, 'FORD': 104, 'CHEVR': 76, 'NISSA': 70, 'DODGE': 45, 'FRUEH': 44, 'ME/BE': 38, 'GMC': 35, 'HYUND': 35, 'BMW': 34, 'LEXUS': 26, 'INTER': 25, 'JEEP': 22, 'NS/OT': 18, 'SUBAR': 18, 'INFIN': 13, 'LINCO': 12, 'CHRYS': 12, 'ACURA': 12, 'AUDI': 12, 'VOLVO': 12, 'MITSU': 11, 'ISUZU': 10, 'CADIL': 9, 'KIA': 8, 'VOLKS': 8, 'HIN': 6, 'KENWO': 5, 'ROVER': 5, 'BUICK': 5, 'MAZDA': 5, 'MERCU': 4, 'JAGUA': 3, 'SMART': 3, 'PORSC': 3, 'WORKH': 2, 'SATUR': 2, 'SCION': 2, 'SAAB': 2, 'HINO': 2, 'FIR': 1, 'OLDSM': 1, 'PETER': 1, 'CITRO': 1, 'GEO': 1, 'YAMAH': 1, 'BSA': 1, 'MINI': 1, 'PONTI': 1, 'SPRI': 1, 'PLYMO': 1, 'UPS': 1, 'FIAT': 1, 'UD': 1, 'UTILI': 1, 'GMCQ': 1, 'STAR': 1, 'AM/T': 1, 'MI/F': 1}\n"
     ]
    }
   ],
   "source": [
    "print(violation_counts_by_make())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
